In [1]:
import pickle
import pumpp
import numpy as np
import librosa
import os
from glob import glob

import tensorflow as tf
import keras as K
import pescador
import pandas as pd

import jams
from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import GroupShuffleSplit


Using TensorFlow backend.

In [2]:
SEED = 20170401

In [3]:
DATA_DIR = '/home/bmcfee/working/chords/pump'

In [4]:
# Reload the pump
with open('/home/bmcfee/working/chords/pump.pkl', 'rb') as fd:
    pump = pickle.load(fd)

In [5]:
# Calculate the number of frames

MAX_SAMPLES = 128

duration = 8.0

pump['cqt'].sr

n_frames = librosa.time_to_frames(duration,
                                  sr=pump['cqt'].sr,
                                  hop_length=pump['cqt'].hop_length)[0]

sampler = pump.sampler(MAX_SAMPLES, n_frames, random_state=SEED)

In [6]:
n_frames


Out[6]:
86

In [7]:
def data_sampler(fname, sampler):

    data = np.load(fname)
    d2 = dict(data)
    data.close()
    data = d2
    
    yield from sampler(data)

In [8]:
def data_generator(tracks, sampler, k, batch_size=16, augmentation=False, weights=None, **kwargs):
    
    seeds = []
    pool_weights = []
    
    for track in tracks:
        
        fname = os.path.join(DATA_DIR, os.path.extsep.join([track, 'npz']))
        seeds.append(pescador.Streamer(data_sampler, fname, sampler))
        
        if weights is not None:
            pool_weights.append(weights.loc[track])
            
        if augmentation:
            for fname in sorted(glob(os.path.join(DATA_DIR, '{}.*.npz'.format(track)))):
                seeds.append(pescador.Streamer(data_sampler, fname, sampler))
                if weights is not None:
                    pool_weights.append(weights.loc[track])
        
    # Send it all to a mux
    if not pool_weights:
        pool_weights = None
        
    mux = pescador.Mux(seeds, k, pool_weights=pool_weights, **kwargs)
    
    if batch_size == 1:
        return mux
    else:
        return pescador.BufferedStreamer(mux, batch_size)

In [9]:
def wrap(gen):
    
    for batch in gen:
        yield [batch[0], batch[1]], list(batch[2:])

In [81]:
from collections import defaultdict
import mir_eval

def estimate_class_annotation(ann, op, quality_only):
    
    weights = defaultdict(lambda : 0.0)
    
    intervals, values = ann.data.to_interval_values()
    
    for ival, chord in zip(intervals, values):
        chord = op.simplify(chord)
        
        if quality_only:
            chord = reduce_chord(chord)
            
        weights[chord] += ival[1] - ival[0]
    
    return weights, np.max(intervals)
    
def reduce_chord(c):
    
    if ':' in c:
        return c[c.rindex(':')+1:]
    else:
        return c

def estimate_class_weights(tracks, op, pseudo=1e-2, quality_only=True,
                           refs='/home/bmcfee/data/eric_chords/references_v2/'):
    
    seeds = [os.path.join(refs, os.path.extsep.join([track, 'jams'])) for track in tracks]
    
    vocab = op.vocabulary()
    
    if quality_only:
        vocab = set([reduce_chord(c) for c in vocab])
        
    weights = {k: pseudo for k in vocab}
    
    total = 0.0
    
    for jam_in in tqdm(seeds):
        jam = jams.load(jam_in, validate=False)
        for ann in jam.annotations['chord']:
            weights_i, duration_i = estimate_class_annotation(ann, op, quality_only)
            total += duration_i
            for k in weights_i:
                weights[k] += weights_i[k]

    for k in weights:
        weights[k] /= total
    
    return weights

def weight_track(track, class_weights, op, quality_only=True, refs='/home/bmcfee/data/eric_chords/references_v2/',
                 aggregate=np.max, temporal=True):
    
    weights = 0
    jam_in = os.path.join(refs, os.path.extsep.join([track, 'jams']))
    jam = jams.load(jam_in, validate=False)
    
    weight = []
    for ann in jam.annotations['chord']:
        weights_i, duration_i = estimate_class_annotation(ann, op, quality_only)
        
        phat = 0.0
        if not temporal:
            weights_i = set(weights_i.keys())
            
        for k in weights_i:
            if temporal:
                phat += weights_i[k] / duration * np.log(class_weights[k])
            else:
                phat += np.log(class_weights[k]) / len(weights_i)
                
        weight.append(np.exp(-phat))
        
    return aggregate(weight)

def weight_tracks(tracks, *args, **kwargs):
    
    weights = {}
    for track in tqdm(tracks):
        weights[track] = weight_track(track, *args, **kwargs)
        
    return pd.Series(data=weights)

Construct the model


In [11]:
pump.fields


Out[11]:
{'chord_struct/bass': Tensor(shape=(None, 1), dtype=<class 'int'>),
 'chord_struct/pitch': Tensor(shape=(None, 12), dtype=<class 'bool'>),
 'chord_struct/root': Tensor(shape=(None, 1), dtype=<class 'int'>),
 'chord_tag/chord': Tensor(shape=(None, 1), dtype=<class 'int'>),
 'cqt/mag': Tensor(shape=(None, 216, 1), dtype=<class 'numpy.float32'>)}

In [12]:
len(pump['chord_tag'].vocabulary())


Out[12]:
170

In [13]:
pump['cqt'].layers()


Out[13]:
{'cqt/mag': <tf.Tensor 'cqt/mag:0' shape=(?, ?, 216, 1) dtype=float32>}

In [ ]:
LAYERS = pump['cqt'].layers()

x = LAYERS['cqt/mag']

b = K.layers.BatchNormalization()(x)

c0 = K.layers.Convolution2D(1, (5, 5), padding='same',
                            activation='relu',
                            data_format='channels_last')(b)

c1 = K.layers.Convolution2D(36, (1, int(c0.shape[2])), padding='valid', activation='relu',
                            data_format='channels_last')(c0)

r1 = K.layers.Lambda(lambda x: K.backend.squeeze(x, axis=2))(c1)

rs = K.layers.Bidirectional(K.layers.GRU(256, return_sequences=True))(r1)

p0 = K.layers.Dense(len(pump['chord_tag'].vocabulary()), activation='softmax',
                    bias_regularizer=K.regularizers.l2())

p1 = K.layers.TimeDistributed(p0)(rs)

model = K.models.Model(x, p1)

Run a train-test split


In [ ]:
index = pd.read_json('/home/bmcfee/working/chords/artist_index.json', typ='series')

splitter_tt = GroupShuffleSplit(n_splits=1, random_state=SEED)
for train_, test in splitter_tt.split(index, groups=list(index)):
    idx_train_ = index.iloc[train_]
    idx_test = index.iloc[test]
    splitter_tv = GroupShuffleSplit(n_splits=1, test_size=0.25, random_state=SEED)
    
    for train, val in splitter_tv.split(idx_train_, groups=list(idx_train_)):
        idx_train = idx_train_.iloc[train]
        idx_val = idx_train_.iloc[val]
    
    
        chord_weights = estimate_class_weights(idx_train.index, pump['chord_tag'],
                                               quality_only=True)

        train_weights = weight_tracks(idx_train.index, chord_weights, pump['chord_tag'],
                                      quality_only=True, temporal=False)
        
        gen_train = data_generator(train_weights.index, sampler, 1024, augmentation=True, 
                                   lam=8, batch_size=32, revive=True, weights=train_weights,
                                   random_state=SEED)
        
        gen_val = data_generator(idx_val.index, sampler, len(idx_val),
                                 batch_size=32, revive=True, random_state=SEED)
        

        model.compile(K.optimizers.Adam(),
                      'sparse_categorical_crossentropy',
                      metrics=['sparse_categorical_accuracy'])

        model.fit_generator(gen_train.tuples('cqt/mag', 'chord_tag/chord'), 512, 100,
                            validation_data=gen_val.tuples('cqt/mag', 'chord_tag/chord'),
                            validation_steps=1024,
                            callbacks=[K.callbacks.ModelCheckpoint('/home/bmcfee/working/chords/model_simple_ckpt.pkl',
                                                                   save_best_only=True,
                                                                   verbose=1),
                                       K.callbacks.ReduceLROnPlateau(patience=5, verbose=1),
                                       K.callbacks.EarlyStopping(patience=15, verbose=0)])



Epoch 1/100
511/512 [============================>.] - ETA: 0s - loss: 2.2732 - sparse_categorical_accuracy: 0.4428Epoch 00000: val_loss improved from inf to 1.87402, saving model to /home/bmcfee/working/chords/model_simple_ckpt.pkl
512/512 [==============================] - 197s - loss: 2.2725 - sparse_categorical_accuracy: 0.4430 - val_loss: 1.8740 - val_sparse_categorical_accuracy: 0.5170
Epoch 2/100
511/512 [============================>.] - ETA: 0s - loss: 1.5320 - sparse_categorical_accuracy: 0.5838Epoch 00001: val_loss improved from 1.87402 to 1.79164, saving model to /home/bmcfee/working/chords/model_simple_ckpt.pkl
512/512 [==============================] - 197s - loss: 1.5328 - sparse_categorical_accuracy: 0.5836 - val_loss: 1.7916 - val_sparse_categorical_accuracy: 0.5389
Epoch 3/100
511/512 [============================>.] - ETA: 0s - loss: 1.4810 - sparse_categorical_accuracy: 0.5889Epoch 00002: val_loss improved from 1.79164 to 1.64881, saving model to /home/bmcfee/working/chords/model_simple_ckpt.pkl
512/512 [==============================] - 196s - loss: 1.4801 - sparse_categorical_accuracy: 0.5891 - val_loss: 1.6488 - val_sparse_categorical_accuracy: 0.5672
Epoch 4/100
511/512 [============================>.] - ETA: 0s - loss: 1.3776 - sparse_categorical_accuracy: 0.6123Epoch 00003: val_loss improved from 1.64881 to 1.62763, saving model to /home/bmcfee/working/chords/model_simple_ckpt.pkl
512/512 [==============================] - 197s - loss: 1.3770 - sparse_categorical_accuracy: 0.6124 - val_loss: 1.6276 - val_sparse_categorical_accuracy: 0.5633
Epoch 5/100
511/512 [============================>.] - ETA: 0s - loss: 1.3651 - sparse_categorical_accuracy: 0.6179Epoch 00004: val_loss improved from 1.62763 to 1.56387, saving model to /home/bmcfee/working/chords/model_simple_ckpt.pkl
512/512 [==============================] - 196s - loss: 1.3649 - sparse_categorical_accuracy: 0.6179 - val_loss: 1.5639 - val_sparse_categorical_accuracy: 0.5798
Epoch 6/100
511/512 [============================>.] - ETA: 0s - loss: 1.3395 - sparse_categorical_accuracy: 0.6206Epoch 00005: val_loss did not improve
512/512 [==============================] - 197s - loss: 1.3388 - sparse_categorical_accuracy: 0.6207 - val_loss: 1.5934 - val_sparse_categorical_accuracy: 0.5698
Epoch 7/100
511/512 [============================>.] - ETA: 0s - loss: 1.3422 - sparse_categorical_accuracy: 0.6198Epoch 00006: val_loss did not improve
512/512 [==============================] - 196s - loss: 1.3420 - sparse_categorical_accuracy: 0.6197 - val_loss: 1.6155 - val_sparse_categorical_accuracy: 0.5745
Epoch 8/100
511/512 [============================>.] - ETA: 0s - loss: 1.3393 - sparse_categorical_accuracy: 0.6186Epoch 00007: val_loss improved from 1.56387 to 1.46804, saving model to /home/bmcfee/working/chords/model_simple_ckpt.pkl
512/512 [==============================] - 198s - loss: 1.3395 - sparse_categorical_accuracy: 0.6186 - val_loss: 1.4680 - val_sparse_categorical_accuracy: 0.5898
Epoch 9/100
511/512 [============================>.] - ETA: 0s - loss: 1.3073 - sparse_categorical_accuracy: 0.6266Epoch 00008: val_loss did not improve
512/512 [==============================] - 202s - loss: 1.3079 - sparse_categorical_accuracy: 0.6264 - val_loss: 1.4898 - val_sparse_categorical_accuracy: 0.6072
Epoch 10/100
511/512 [============================>.] - ETA: 0s - loss: 1.2888 - sparse_categorical_accuracy: 0.6312Epoch 00009: val_loss did not improve
512/512 [==============================] - 203s - loss: 1.2886 - sparse_categorical_accuracy: 0.6313 - val_loss: 1.4973 - val_sparse_categorical_accuracy: 0.6061
Epoch 11/100
511/512 [============================>.] - ETA: 0s - loss: 1.2445 - sparse_categorical_accuracy: 0.6434Epoch 00010: val_loss did not improve
512/512 [==============================] - 203s - loss: 1.2444 - sparse_categorical_accuracy: 0.6433 - val_loss: 1.5481 - val_sparse_categorical_accuracy: 0.5956
Epoch 12/100
511/512 [============================>.] - ETA: 0s - loss: 1.2436 - sparse_categorical_accuracy: 0.6458Epoch 00011: val_loss did not improve
512/512 [==============================] - 203s - loss: 1.2445 - sparse_categorical_accuracy: 0.6456 - val_loss: 1.5897 - val_sparse_categorical_accuracy: 0.5906
Epoch 13/100
511/512 [============================>.] - ETA: 0s - loss: 1.2382 - sparse_categorical_accuracy: 0.6456Epoch 00012: val_loss did not improve
512/512 [==============================] - 204s - loss: 1.2380 - sparse_categorical_accuracy: 0.6455 - val_loss: 1.5488 - val_sparse_categorical_accuracy: 0.5962
Epoch 14/100
511/512 [============================>.] - ETA: 0s - loss: 1.2331 - sparse_categorical_accuracy: 0.6472Epoch 00013: val_loss did not improve

Epoch 00013: reducing learning rate to 0.00010000000474974513.
512/512 [==============================] - 204s - loss: 1.2330 - sparse_categorical_accuracy: 0.6472 - val_loss: 1.4856 - val_sparse_categorical_accuracy: 0.6018
Epoch 15/100
511/512 [============================>.] - ETA: 0s - loss: 1.2147 - sparse_categorical_accuracy: 0.6527Epoch 00014: val_loss improved from 1.46804 to 1.40302, saving model to /home/bmcfee/working/chords/model_simple_ckpt.pkl
512/512 [==============================] - 203s - loss: 1.2143 - sparse_categorical_accuracy: 0.6528 - val_loss: 1.4030 - val_sparse_categorical_accuracy: 0.6226
Epoch 16/100
511/512 [============================>.] - ETA: 0s - loss: 1.2207 - sparse_categorical_accuracy: 0.6526Epoch 00015: val_loss improved from 1.40302 to 1.38009, saving model to /home/bmcfee/working/chords/model_simple_ckpt.pkl
512/512 [==============================] - 203s - loss: 1.2211 - sparse_categorical_accuracy: 0.6524 - val_loss: 1.3801 - val_sparse_categorical_accuracy: 0.6166
Epoch 17/100
511/512 [============================>.] - ETA: 0s - loss: 1.1942 - sparse_categorical_accuracy: 0.6559Epoch 00016: val_loss improved from 1.38009 to 1.34212, saving model to /home/bmcfee/working/chords/model_simple_ckpt.pkl
512/512 [==============================] - 203s - loss: 1.1948 - sparse_categorical_accuracy: 0.6557 - val_loss: 1.3421 - val_sparse_categorical_accuracy: 0.6339
Epoch 18/100
511/512 [============================>.] - ETA: 0s - loss: 1.2306 - sparse_categorical_accuracy: 0.6448Epoch 00017: val_loss did not improve
512/512 [==============================] - 204s - loss: 1.2303 - sparse_categorical_accuracy: 0.6448 - val_loss: 1.3774 - val_sparse_categorical_accuracy: 0.6322
Epoch 19/100
511/512 [============================>.] - ETA: 0s - loss: 1.1876 - sparse_categorical_accuracy: 0.6553Epoch 00018: val_loss did not improve
512/512 [==============================] - 205s - loss: 1.1878 - sparse_categorical_accuracy: 0.6553 - val_loss: 1.4078 - val_sparse_categorical_accuracy: 0.6111
Epoch 20/100
511/512 [============================>.] - ETA: 0s - loss: 1.2100 - sparse_categorical_accuracy: 0.6510Epoch 00019: val_loss did not improve
512/512 [==============================] - 205s - loss: 1.2103 - sparse_categorical_accuracy: 0.6509 - val_loss: 1.4424 - val_sparse_categorical_accuracy: 0.6053
Epoch 21/100
511/512 [============================>.] - ETA: 0s - loss: 1.2000 - sparse_categorical_accuracy: 0.6548Epoch 00020: val_loss did not improve
512/512 [==============================] - 205s - loss: 1.2001 - sparse_categorical_accuracy: 0.6548 - val_loss: 1.5043 - val_sparse_categorical_accuracy: 0.6069
Epoch 22/100
511/512 [============================>.] - ETA: 0s - loss: 1.2099 - sparse_categorical_accuracy: 0.6525Epoch 00021: val_loss did not improve
512/512 [==============================] - 205s - loss: 1.2097 - sparse_categorical_accuracy: 0.6525 - val_loss: 1.3510 - val_sparse_categorical_accuracy: 0.6262
Epoch 23/100
511/512 [============================>.] - ETA: 0s - loss: 1.1905 - sparse_categorical_accuracy: 0.6555Epoch 00022: val_loss did not improve

Epoch 00022: reducing learning rate to 1.0000000474974514e-05.
512/512 [==============================] - 205s - loss: 1.1907 - sparse_categorical_accuracy: 0.6554 - val_loss: 1.4026 - val_sparse_categorical_accuracy: 0.6223
Epoch 24/100
511/512 [============================>.] - ETA: 0s - loss: 1.1946 - sparse_categorical_accuracy: 0.6547Epoch 00023: val_loss did not improve
512/512 [==============================] - 204s - loss: 1.1945 - sparse_categorical_accuracy: 0.6547 - val_loss: 1.3585 - val_sparse_categorical_accuracy: 0.6325
Epoch 25/100
511/512 [============================>.] - ETA: 0s - loss: 1.2222 - sparse_categorical_accuracy: 0.6469  ETA: 10s - loss: 1.221Epoch 00024: val_loss did not improve
512/512 [==============================] - 204s - loss: 1.2222 - sparse_categorical_accuracy: 0.6469 - val_loss: 1.5299 - val_sparse_categorical_accuracy: 0.6066
Epoch 26/100
511/512 [============================>.] - ETA: 0s - loss: 1.1967 - sparse_categorical_accuracy: 0.6559Epoch 00025: val_loss did not improve
512/512 [==============================] - 203s - loss: 1.1977 - sparse_categorical_accuracy: 0.6558 - val_loss: 1.3459 - val_sparse_categorical_accuracy: 0.6418
Epoch 27/100
511/512 [============================>.] - ETA: 0s - loss: 1.2198 - sparse_categorical_accuracy: 0.6528Epoch 00026: val_loss did not improve
512/512 [==============================] - 204s - loss: 1.2201 - sparse_categorical_accuracy: 0.6526 - val_loss: 1.3855 - val_sparse_categorical_accuracy: 0.6220
Epoch 28/100
511/512 [============================>.] - ETA: 0s - loss: 1.2129 - sparse_categorical_accuracy: 0.6488Epoch 00027: val_loss did not improve

Epoch 00027: reducing learning rate to 1.0000000656873453e-06.
512/512 [==============================] - 204s - loss: 1.2121 - sparse_categorical_accuracy: 0.6489 - val_loss: 1.3767 - val_sparse_categorical_accuracy: 0.6273
Epoch 29/100
511/512 [============================>.] - ETA: 0s - loss: 1.2090 - sparse_categorical_accuracy: 0.6520Epoch 00028: val_loss did not improve
512/512 [==============================] - 203s - loss: 1.2086 - sparse_categorical_accuracy: 0.6522 - val_loss: 1.4131 - val_sparse_categorical_accuracy: 0.6112
Epoch 30/100
511/512 [============================>.] - ETA: 0s - loss: 1.1854 - sparse_categorical_accuracy: 0.6585Epoch 00029: val_loss did not improve
512/512 [==============================] - 203s - loss: 1.1855 - sparse_categorical_accuracy: 0.6584 - val_loss: 1.4447 - val_sparse_categorical_accuracy: 0.6199
Epoch 31/100
511/512 [============================>.] - ETA: 0s - loss: 1.1973 - sparse_categorical_accuracy: 0.6518Epoch 00030: val_loss did not improve
512/512 [==============================] - 204s - loss: 1.1977 - sparse_categorical_accuracy: 0.6517 - val_loss: 1.4080 - val_sparse_categorical_accuracy: 0.6175
Epoch 32/100
511/512 [============================>.] - ETA: 0s - loss: 1.1917 - sparse_categorical_accuracy: 0.6510Epoch 00031: val_loss improved from 1.34212 to 1.33430, saving model to /home/bmcfee/working/chords/model_simple_ckpt.pkl
512/512 [==============================] - 204s - loss: 1.1917 - sparse_categorical_accuracy: 0.6511 - val_loss: 1.3343 - val_sparse_categorical_accuracy: 0.6367
Epoch 33/100
511/512 [============================>.] - ETA: 0s - loss: 1.1942 - sparse_categorical_accuracy: 0.6527Epoch 00032: val_loss did not improve
512/512 [==============================] - 204s - loss: 1.1940 - sparse_categorical_accuracy: 0.6528 - val_loss: 1.3475 - val_sparse_categorical_accuracy: 0.6312
Epoch 34/100
511/512 [============================>.] - ETA: 0s - loss: 1.2036 - sparse_categorical_accuracy: 0.6540Epoch 00033: val_loss did not improve
512/512 [==============================] - 205s - loss: 1.2035 - sparse_categorical_accuracy: 0.6540 - val_loss: 1.4254 - val_sparse_categorical_accuracy: 0.6215
Epoch 35/100
511/512 [============================>.] - ETA: 0s - loss: 1.1946 - sparse_categorical_accuracy: 0.6515Epoch 00034: val_loss did not improve
512/512 [==============================] - 203s - loss: 1.1948 - sparse_categorical_accuracy: 0.6515 - val_loss: 1.4776 - val_sparse_categorical_accuracy: 0.5996
Epoch 36/100
511/512 [============================>.] - ETA: 0s - loss: 1.1822 - sparse_categorical_accuracy: 0.6582Epoch 00035: val_loss did not improve
512/512 [==============================] - 205s - loss: 1.1819 - sparse_categorical_accuracy: 0.6583 - val_loss: 1.3706 - val_sparse_categorical_accuracy: 0.6280
Epoch 37/100
511/512 [============================>.] - ETA: 0s - loss: 1.1899 - sparse_categorical_accuracy: 0.6533Epoch 00036: val_loss did not improve
512/512 [==============================] - 198s - loss: 1.1900 - sparse_categorical_accuracy: 0.6533 - val_loss: 1.4072 - val_sparse_categorical_accuracy: 0.6092
Epoch 38/100
511/512 [============================>.] - ETA: 0s - loss: 1.1924 - sparse_categorical_accuracy: 0.6565Epoch 00037: val_loss improved from 1.33430 to 1.33189, saving model to /home/bmcfee/working/chords/model_simple_ckpt.pkl
512/512 [==============================] - 196s - loss: 1.1922 - sparse_categorical_accuracy: 0.6566 - val_loss: 1.3319 - val_sparse_categorical_accuracy: 0.6353
Epoch 39/100
511/512 [============================>.] - ETA: 0s - loss: 1.1902 - sparse_categorical_accuracy: 0.6544Epoch 00038: val_loss did not improve
512/512 [==============================] - 196s - loss: 1.1905 - sparse_categorical_accuracy: 0.6542 - val_loss: 1.4282 - val_sparse_categorical_accuracy: 0.6131
Epoch 40/100
511/512 [============================>.] - ETA: 0s - loss: 1.1865 - sparse_categorical_accuracy: 0.6548Epoch 00039: val_loss did not improve
512/512 [==============================] - 196s - loss: 1.1864 - sparse_categorical_accuracy: 0.6548 - val_loss: 1.3920 - val_sparse_categorical_accuracy: 0.6238
Epoch 41/100
511/512 [============================>.] - ETA: 0s - loss: 1.1910 - sparse_categorical_accuracy: 0.6541Epoch 00040: val_loss did not improve
512/512 [==============================] - 195s - loss: 1.1907 - sparse_categorical_accuracy: 0.6543 - val_loss: 1.4106 - val_sparse_categorical_accuracy: 0.6186
Epoch 42/100
511/512 [============================>.] - ETA: 0s - loss: 1.2025 - sparse_categorical_accuracy: 0.6520Epoch 00041: val_loss did not improve
512/512 [==============================] - 196s - loss: 1.2027 - sparse_categorical_accuracy: 0.6520 - val_loss: 1.4036 - val_sparse_categorical_accuracy: 0.6315
Epoch 43/100
511/512 [============================>.] - ETA: 0s - loss: 1.2049 - sparse_categorical_accuracy: 0.6489Epoch 00044: val_loss did not improve
512/512 [==============================] - 195s - loss: 1.2047 - sparse_categorical_accuracy: 0.6490 - val_loss: 1.4590 - val_sparse_categorical_accuracy: 0.6125
Epoch 46/100
511/512 [============================>.] - ETA: 0s - loss: 1.1868 - sparse_categorical_accuracy: 0.6547Epoch 00045: val_loss did not improve
512/512 [==============================] - 196s - loss: 1.1864 - sparse_categorical_accuracy: 0.6549 - val_loss: 1.3915 - val_sparse_categorical_accuracy: 0.6169
Epoch 47/100
511/512 [============================>.] - ETA: 0s - loss: 1.2239 - sparse_categorical_accuracy: 0.6461Epoch 00046: val_loss did not improve
512/512 [==============================] - 197s - loss: 1.2240 - sparse_categorical_accuracy: 0.6461 - val_loss: 1.4135 - val_sparse_categorical_accuracy: 0.6273
Epoch 48/100
511/512 [============================>.] - ETA: 0s - loss: 1.2218 - sparse_categorical_accuracy: 0.6455Epoch 00047: val_loss did not improve
512/512 [==============================] - 197s - loss: 1.2217 - sparse_categorical_accuracy: 0.6455 - val_loss: 1.3927 - val_sparse_categorical_accuracy: 0.6255
Epoch 49/100
511/512 [============================>.] - ETA: 0s - loss: 1.2060 - sparse_categorical_accuracy: 0.6557Epoch 00048: val_loss did not improve

Epoch 00048: reducing learning rate to 1.000000082740371e-08.
512/512 [==============================] - 197s - loss: 1.2063 - sparse_categorical_accuracy: 0.6556 - val_loss: 1.3505 - val_sparse_categorical_accuracy: 0.6291
Epoch 50/100
511/512 [============================>.] - ETA: 0s - loss: 1.1848 - sparse_categorical_accuracy: 0.6566Epoch 00049: val_loss did not improve
512/512 [==============================] - 198s - loss: 1.1841 - sparse_categorical_accuracy: 0.6568 - val_loss: 1.3940 - val_sparse_categorical_accuracy: 0.6271
Epoch 51/100
511/512 [============================>.] - ETA: 0s - loss: 1.1916 - sparse_categorical_accuracy: 0.6561Epoch 00050: val_loss did not improve
512/512 [==============================] - 197s - loss: 1.1917 - sparse_categorical_accuracy: 0.6561 - val_loss: 1.3978 - val_sparse_categorical_accuracy: 0.6262
Epoch 52/100
511/512 [============================>.] - ETA: 0s - loss: 1.1752 - sparse_categorical_accuracy: 0.6595Epoch 00051: val_loss did not improve
512/512 [==============================] - 197s - loss: 1.1748 - sparse_categorical_accuracy: 0.6596 - val_loss: 1.3867 - val_sparse_categorical_accuracy: 0.6162
Epoch 53/100
511/512 [============================>.] - ETA: 0s - loss: 1.1758 - sparse_categorical_accuracy: 0.6567Epoch 00052: val_loss did not improve
512/512 [==============================] - 197s - loss: 1.1758 - sparse_categorical_accuracy: 0.6567 - val_loss: 1.3768 - val_sparse_categorical_accuracy: 0.6177
Epoch 54/100
511/512 [============================>.] - ETA: 0s - loss: 1.1870 - sparse_categorical_accuracy: 0.6539Epoch 00053: val_loss did not improve

Epoch 00053: reducing learning rate to 1.000000082740371e-09.
512/512 [==============================] - 197s - loss: 1.1874 - sparse_categorical_accuracy: 0.6539 - val_loss: 1.4012 - val_sparse_categorical_accuracy: 0.6157

In [128]:
model.load_weights('/home/bmcfee/working/chords/model_simple_ckpt.pkl')

Diagnostics


In [129]:
import pandas as pd

In [130]:
import jams.display
import matplotlib.pyplot as plt
%matplotlib nbagg
import librosa.display

In [131]:
history = pd.DataFrame.from_dict(model.history.history)
plt.figure()

plt.plot(history['sparse_categorical_accuracy'], label='Training accuracy')
plt.plot(history['val_sparse_categorical_accuracy'], label='Validation accuracy')
plt.legend(loc='best')


Out[131]:
<matplotlib.legend.Legend at 0x7fb43eb7c6a0>

In [132]:
l1 = model.get_weights()[4]

In [133]:
plt.figure(figsize=2 * np.asarray(l1.shape[2:]))

pi = 0
for f in range(l1.shape[3]):
    for c in range(l1.shape[2]):
        pi += 1
        plt.subplot(l1.shape[3], l1.shape[2], pi)
        librosa.display.specshow(l1[:, :, c, f].T)#, vmin=l1.min(), vmax=l1.max())
        
        
plt.tight_layout()



In [134]:
l2 = model.get_weights()[6].squeeze()
l2 = librosa.util.axis_sort(l2)

In [135]:
plt.figure(figsize=(8,4))
librosa.display.specshow(l2, y_axis='cqt_note', sr=pump.ops[0].sr, bins_per_octave=36)
plt.tight_layout()



Validation viz


In [136]:
import pandas as pd
import jams

from tqdm import tqdm_notebook as tqdm

from IPython.display import Audio

import jams

import librosa

In [137]:
def score_model(pump, model, idx,
                features='/home/bmcfee/working/chords/pump',
                refs='/home/bmcfee/data/eric_chords/references_v2/'):
    
    results = {}
    for item in tqdm(idx.index):
        jam = jams.load('{}/{}.jams'.format(refs, item), validate=False)
        datum = np.load('{}/{}.npz'.format(features, item))['cqt/mag']
        
        ann = pump.ops[1].inverse(model.predict(datum)[0])
        results[item] = jams.eval.chord(jam.annotations['chord', 0], ann)
        
    return pd.DataFrame.from_dict(results, orient='index')

In [138]:
df = score_model(pump, model, idx_test)




In [139]:
dfr = df[['thirds', 'triads', 'tetrads', 'root', 'mirex', 'majmin', 'sevenths']]

In [140]:
dfr.describe()


Out[140]:
thirds triads tetrads root mirex majmin sevenths
count 223.000000 223.000000 223.000000 223.000000 223.000000 223.000000 223.000000
mean 0.770473 0.729437 0.589792 0.804300 0.785739 0.784287 0.646950
std 0.144725 0.184276 0.207877 0.117333 0.140484 0.147774 0.194997
min 0.060454 0.041885 0.022028 0.063522 0.060454 0.061550 0.022439
25% 0.694246 0.642637 0.467216 0.728655 0.727393 0.716817 0.545648
50% 0.795132 0.781245 0.617723 0.823463 0.814156 0.821630 0.685530
75% 0.876996 0.860771 0.744916 0.888262 0.880772 0.892760 0.781547
max 0.985654 0.985654 0.952621 0.985654 0.985654 0.985654 0.971140

In [104]:
dfr.describe()


Out[104]:
thirds triads tetrads root mirex majmin sevenths
count 223.000000 223.000000 223.000000 223.000000 223.000000 223.000000 223.000000
mean 0.774634 0.732693 0.601301 0.808049 0.795286 0.788082 0.658705
std 0.146957 0.188047 0.204042 0.115627 0.132422 0.151665 0.188492
min 0.007165 0.007165 0.007165 0.069345 0.082691 0.007165 0.007165
25% 0.698890 0.657726 0.470457 0.750213 0.738901 0.720950 0.560315
50% 0.807144 0.779519 0.637144 0.825770 0.822403 0.822064 0.698600
75% 0.879131 0.864386 0.750019 0.892497 0.882436 0.898903 0.786822
max 0.985266 0.985266 0.915041 0.985266 0.985266 0.985266 0.948172

In [28]:
dfr.describe()


Out[28]:
thirds triads tetrads root mirex majmin sevenths
count 223.000000 223.000000 223.000000 223.000000 223.000000 223.000000 223.000000
mean 0.772300 0.729748 0.589129 0.806987 0.788497 0.785096 0.646118
std 0.148328 0.189070 0.203291 0.118855 0.140169 0.154517 0.189233
min 0.074984 0.039769 0.024978 0.099309 0.072262 0.072262 0.024978
25% 0.704841 0.636315 0.466297 0.742035 0.734978 0.727577 0.539258
50% 0.805573 0.778919 0.614245 0.828047 0.815798 0.812942 0.693661
75% 0.880851 0.866131 0.732395 0.892318 0.884311 0.898266 0.769953
max 0.985322 0.985322 0.917291 0.985322 0.985322 0.985322 0.960053

In [111]:
dfr.describe()


Out[111]:
thirds triads tetrads root mirex majmin sevenths
count 223.000000 223.000000 223.000000 223.000000 223.000000 223.000000 223.000000
mean 0.766501 0.727001 0.582575 0.805127 0.793788 0.780656 0.636473
std 0.150797 0.188267 0.214849 0.120718 0.136653 0.153669 0.203488
min 0.077030 0.035399 0.009879 0.096665 0.094359 0.064323 0.018820
25% 0.693837 0.641354 0.443133 0.745414 0.727854 0.705884 0.524820
50% 0.792074 0.770722 0.626143 0.828113 0.824786 0.811991 0.684789
75% 0.883529 0.858151 0.744555 0.885459 0.889261 0.890280 0.780201
max 0.984445 0.984445 0.920421 0.984445 0.984445 0.984445 0.979995

In [40]:
dfr.describe()


Out[40]:
thirds triads tetrads root mirex majmin sevenths
count 223.000000 223.000000 223.000000 223.000000 223.000000 223.000000 223.000000
mean 0.769563 0.727529 0.590000 0.804221 0.794089 0.783547 0.646078
std 0.148312 0.187265 0.202314 0.120829 0.136100 0.154343 0.189277
min 0.049119 0.034102 0.026934 0.060540 0.049119 0.050010 0.026934
25% 0.680561 0.636185 0.463364 0.734355 0.735562 0.709487 0.541688
50% 0.798823 0.777461 0.610777 0.828335 0.815728 0.818413 0.686474
75% 0.882994 0.862762 0.739739 0.901487 0.883233 0.902220 0.785684
max 0.991141 0.991141 0.916893 0.991141 0.991141 0.991141 0.964697

In [141]:
plt.figure()
dfr.boxplot();



In [142]:
dfr.describe().loc['mean']


Out[142]:
thirds      0.770473
triads      0.729437
tetrads     0.589792
root        0.804300
mirex       0.785739
majmin      0.784287
sevenths    0.646950
Name: mean, dtype: float64

In [105]:
dfr.describe().loc['mean']


Out[105]:
thirds      0.774634
triads      0.732693
tetrads     0.601301
root        0.808049
mirex       0.795286
majmin      0.788082
sevenths    0.658705
Name: mean, dtype: float64

In [30]:
dfr.describe().loc['mean']


Out[30]:
thirds      0.772300
triads      0.729748
tetrads     0.589129
root        0.806987
mirex       0.788497
majmin      0.785096
sevenths    0.646118
Name: mean, dtype: float64

In [113]:
dfr.describe().loc['mean']


Out[113]:
thirds      0.766501
triads      0.727001
tetrads     0.582575
root        0.805127
mirex       0.793788
majmin      0.780656
sevenths    0.636473
Name: mean, dtype: float64

In [42]:
dfr.describe().loc['mean']


Out[42]:
thirds      0.769563
triads      0.727529
tetrads     0.590000
root        0.804221
mirex       0.794089
majmin      0.783547
sevenths    0.646078
Name: mean, dtype: float64

In [43]:
F = idx_val.index[99]

In [106]:
F = df['mirex'].argmin()

In [114]:
F = 'TRWMAJH127F8F16AFF'

In [115]:
F


Out[115]:
'TRWMAJH127F8F16AFF'

In [116]:
datum = np.load('/home/bmcfee/working/chords/pump/{}.npz'.format(F))

In [117]:
J = jams.load('/home/bmcfee/data/eric_chords/references_v2/{}.jams'.format(F))

In [118]:
ann_true = pump['chord_tag'].inverse(datum['chord_tag/chord'][0])

In [119]:
ann = pump['chord_tag'].inverse(model.predict(datum['cqt/mag'])[0])

In [120]:
pd.DataFrame.from_records([jams.eval.chord(J.annotations['chord', 0], ann)]).loc[0]


Out[120]:
thirds          0.312464
thirds_inv      0.312464
triads          0.031507
triads_inv      0.031507
tetrads         0.031507
tetrads_inv     0.031507
root            0.374850
mirex           0.155307
majmin          0.057251
majmin_inv      0.057251
sevenths        0.057251
sevenths_inv    0.057251
Name: 0, dtype: float64

In [121]:
plt.figure(figsize=(10, 8))

ax = plt.subplot(2,1,1)
librosa.display.specshow(datum['cqt/mag'][0, :, :, 0].T,
                         sr=pump.ops[0].sr,
                         hop_length=pump.ops[0].hop_length,
                         x_axis='time')

plt.subplot(2,1,2, sharex=ax)
jams.display.display(ann_true, meta=False, label='Reference', alpha=0.5)
jams.display.display(ann, meta=False, label='Estimate', alpha=0.5)
plt.legend()
plt.tight_layout()



In [79]:
Audio(filename='/home/bmcfee/data/eric_chords/audio/{}.mp3'.format(F))


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

In [ ]:
y, sr = librosa.load('/home/bmcfee/data/eric_chords/audio/{}.mp3'.format(F), sr=8000)

In [ ]:
Audio(data=np.vstack([y, jams.sonify.sonify(ann, sr=sr, duration=int(np.ceil(len(y) / sr)))[:len(y)]]),
      rate=sr)

In [ ]: